package com.yj.auto.plugin.lucene.utils;

import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.wltea.analyzer.lucene.IKAnalyzer;

import com.jfinal.kit.Prop;
import com.jfinal.kit.PropKit;
import com.jfinal.kit.StrKit;
import com.jfinal.log.Log;
import com.yj.auto.core.web.system.model.Atta;
import com.yj.auto.core.web.system.model.Config;
import com.yj.auto.core.web.system.service.ConfigService;
import com.yj.auto.helper.AutoHelper;
import com.yj.auto.plugin.lucene.model.AutoIndexSearcher;
import com.yj.auto.plugin.lucene.model.AutoIndexWriter;
import com.yj.auto.plugin.lucene.model.IndexModel;
import com.yj.auto.plugin.lucene.model.ResultModel;
import com.yj.auto.plugin.lucene.model.SearchModel;

public class LuceneUtil {
	private static final Log logger = Log.getLog(LuceneUtil.class);
	public static final String LUCENE_CONFIG = "conf/lucene.properties";// 全文检索配置
	public final static Prop config = PropKit.use(LUCENE_CONFIG);

	public final static String LAST_ATTA_KEY = "LUCENE_INDEX_LAST_ATTA_ID";// 配置文件中存储最后加工附件的ID
	public final static List<String> INDEX_FILE_EXT = Arrays.asList(config.get("LUCENE_INDEX_FILE_EXT", "txt,pdf,ppt,pptx,doc,docx,xls,xlsx,html,htm").split(","));// 需加工索引的文件类型
	public final static List<String> INDEX_ATTA_TYPE = Arrays.asList(config.get("LUCENE_INDEX_ATTA_TYPE", "").split(","));// 需加工索引的附件类型
	public final static String LUCENE_INDEX_PATH = config.get("LUCENE_INDEX_PATH");// 索引目录
	public final static String LUCENE_HIGHLIGHTER_PRETAG = config.get("LUCENE_HIGHLIGHTER_PRETAG");// 高亮显示时的html前缀
	public final static String LUCENE_HIGHLIGHTER_POSTTAG = config.get("LUCENE_HIGHLIGHTER_POSTTAG");// 高亮显示时的html后缀
	public final static int LUCENE_HIGHLIGHTER_FRAGMENT_SIZE = config.getInt("LUCENE_HIGHLIGHTER_FRAGMENT_SIZE", 80);// 高亮显示时片断长度
	public final static int LUCENE_PAGE_SIZE = config.getInt("LUCENE_PAGE_SIZE", 20);// 分页查询时每页数量

	private static Directory indexDirectory = null;
	private static Analyzer analyzer = null;

	public static Config getLastAttaConfig() {
		ConfigService configSrv = AutoHelper.getConfigService();
		Config config = configSrv.getCache(LAST_ATTA_KEY);
		return config;
	}

	// 不捕捉异常，直接抛出
	public static Integer getLastAttaId() {
		Config config = getLastAttaConfig();
		Integer id = 0;
		try {
			id = Integer.parseInt(config.getVal());
		} catch (Exception e) {
		}
		return id;
	}

	public static boolean updateLastAttaId(Integer lastId) {
		Config config = getLastAttaConfig();
		config.setVal(String.valueOf(lastId));
		return config.update();
	}

	public static boolean indexFile(Atta model) {
		String ext = model.getExt();
		boolean flag = INDEX_FILE_EXT.contains(ext);
		return flag;
	}

	public static Document getDocument(Atta model) throws Exception {
		String content = text(model);
		if (null == content) {
			return null;
		}
		Document doc = new Document();
		FieldType constantFt = new FieldType(TextField.TYPE_STORED);
		constantFt.setIndexOptions(IndexOptions.DOCS);
		doc.add(new Field(IndexModel.IM_ID, model.getId().toString(), constantFt));
		doc.add(new StoredField(IndexModel.IM_NAME, model.getName()));
		doc.add(new Field(IndexModel.IM_DATA_ID, model.getDataId().toString(), constantFt));
		doc.add(new StoredField(IndexModel.IM_DATA_TYPE, model.getDataType()));
		doc.add(new TextField(IndexModel.IM_CONTENT, content, Field.Store.YES));
		return doc;
	}

	public static long addDocument(AutoIndexWriter writer, Atta model) throws Exception {
		if (!indexFile(model))
			return 0;
		Document doc = getDocument(model);
		if (null == doc)
			return 0;
		return writer.addDocument(doc);
	}

	public static long addDocument(Atta model) throws Exception {
		AutoIndexWriter writer = new AutoIndexWriter();
		long seq = 0;
		try {
			seq = addDocument(writer, model);
		} catch (Exception e) {
			logger.error("add document error", e);
		} finally {
			writer.release();
		}
		return seq;
	}

	public static long updateDocument(AutoIndexWriter writer, Atta model) throws Exception {
		if (!indexFile(model))
			return 0;
		Document doc = getDocument(model);
		if (null == doc)
			return 0;
		Term term = new Term(IndexModel.IM_ID, model.getId().toString());
		return writer.updateDocument(term, doc);
	}

	public static long updateDocument(Atta model) throws Exception {
		AutoIndexWriter writer = new AutoIndexWriter();
		long seq = 0;
		try {
			seq = updateDocument(writer, model);
		} catch (Exception e) {
			logger.error("update document error", e);
		} finally {
			writer.release();
		}
		return seq;
	}

	public static long delDocument(AutoIndexWriter writer, Atta model) throws Exception {
		if (!indexFile(model))
			return 0;
		Term term = new Term(IndexModel.IM_ID, model.getId().toString());
		long seq = writer.deleteDocuments(term);
		return seq;
	}

	public static long delDocument(Atta model) throws Exception {
		AutoIndexWriter writer = new AutoIndexWriter();
		long seq = 0;
		try {
			seq = delDocument(writer, model);
		} catch (Exception e) {
			logger.error("delete document error", e);
		} finally {
			writer.release();
		}
		return seq;
	}

	public static String text(Atta model) throws Exception {
		String ext = model.getExt();
		String filePath = model.getAbsolutePath();
		String text = null;
		if ("txt".equalsIgnoreCase(ext)) {
			text = DocumentUtil.txt2String(filePath);
		} else if ("pdf".equalsIgnoreCase(ext)) {
			text = DocumentUtil.pdf2String(filePath);
		} else if ("doc".equalsIgnoreCase(ext) || "docx".equalsIgnoreCase(ext)) {
			text = DocumentUtil.word2String(filePath);
		} else if ("xls".equalsIgnoreCase(ext) || "xlsx".equalsIgnoreCase(ext)) {
			text = DocumentUtil.excel2String(filePath);
		} else if ("ppt".equalsIgnoreCase(ext) || "pptx".equalsIgnoreCase(ext)) {
			text = DocumentUtil.ppt2String(filePath);
		} else if ("html".equalsIgnoreCase(ext) || "htm".equalsIgnoreCase(ext)) {
			text = DocumentUtil.html2String(filePath);
		}
		return text;
	}

	public static Analyzer getAnalyzer() {
		if (analyzer == null) {
			analyzer = new IKAnalyzer(true);// IK中文分词
			// analyzer = new StandardAnalyzer();
		}
		return analyzer;
	}

	public static Directory getDirectory(String indexPath) throws Exception {
		if (indexDirectory == null) {
			indexDirectory = FSDirectory.open(Paths.get(indexPath));
		}
		return indexDirectory;
	}

	public static Directory getDirectory() throws Exception {
		return getDirectory(LUCENE_INDEX_PATH);
	}

	public static Query getQuery(List<SearchModel> list) throws Exception {
		int len = list.size();
		String[] fields = new String[len];// 待查找字符串对应的字段
		String[] keywords = new String[len];// 要查找的字符串数组
		Occur[] occ = new Occur[len];
		for (int i = 0; i < len; i++) {
			SearchModel model = list.get(i);
			fields[i] = model.getField();
			keywords[i] = model.getKeyword();
			occ[i] = Occur.SHOULD;// Occur.MUST表示对应字段必须有查询值， Occur.MUST_NOT
									// 表示对应字段必须没有查询值，Occur.SHOULD表示对应字段应该存在查询值（但不是必须）
		}
		Query query = MultiFieldQueryParser.parse(keywords, fields, occ, getAnalyzer());
		return query;
	}

	public static ResultModel search(int page, int pageSize, List<SearchModel> list) throws Exception {
		ResultModel result = new ResultModel();
		result.setPage(page);
		if (null == list || list.size() < 1) {
			logger.error(" SearchModel is null  ");
			return result;
		}
		AutoIndexSearcher searcher = new AutoIndexSearcher();
		try {
			Query query = getQuery(list);
			TopDocs docs = searcher.search(page, pageSize, query);
			ScoreDoc[] hits = docs.scoreDocs;
			QueryScorer scorer = new QueryScorer(query);
			SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter(LUCENE_HIGHLIGHTER_PRETAG, LUCENE_HIGHLIGHTER_POSTTAG);// 设定高亮显示的格式<B>keyword</B>,此为默认的格式
			Highlighter highlighter = new Highlighter(simpleHtmlFormatter, scorer);
			highlighter.setTextFragmenter(new SimpleFragmenter(LUCENE_HIGHLIGHTER_FRAGMENT_SIZE));// 设置每次返回的字符数
			Analyzer analyzer = getAnalyzer();
			for (int i = 0; i < hits.length; i++) {
				Document doc = searcher.doc(hits[i].doc);
				Integer id = Integer.parseInt(doc.get(IndexModel.IM_ID));
				String name = doc.get(IndexModel.IM_NAME);
				Integer dataId = Integer.parseInt(doc.get(IndexModel.IM_DATA_ID));
				String dataType = doc.get(IndexModel.IM_DATA_TYPE);
				// String text = doc.get(IndexModel.IM_CONTENT);
				StringBuffer fragment = new StringBuffer();
				for (SearchModel sm : list) {
					if (fragment.length() > 0) {
						fragment.append("...");
					}
					fragment.append(highlighter.getBestFragment(analyzer, sm.getField(), doc.get(sm.getField())));
				}
				String content = fragment.toString();
				if (StrKit.isBlank(content) || "null".equals(content)) {
					content = name;
				}
				IndexModel model = new IndexModel(id, name, dataId, dataType, content);
				result.addData(model);
			}
		} catch (Exception e) {
			logger.error("search  error:", e);
		} finally {
			searcher.release();
		}
		return result;
	}

	public static ResultModel search(int page, String keyword) throws Exception {
		List<SearchModel> list = new ArrayList<SearchModel>();
		list.add(new SearchModel(IndexModel.IM_CONTENT, keyword));
		return search(page, LUCENE_PAGE_SIZE, list);
	}

}
